library(slingshot)
## Loading required package: princurve
library(RColorBrewer)
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.8-23. For overview type 'help("mgcv-package")'.
library(tradeR)
library(edgeR)
## Warning: package 'edgeR' was built under R version 3.5.1
## Loading required package: limma
## Warning: package 'limma' was built under R version 3.5.1
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
library(rafalib)
library(wesanderson)
data <- readRDS("~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/tradeRPaper/simulation/sim2_dyntoy_bifurcating_4/bifurcating_4.rds")
counts <- t(data$counts)
falseGenes <- data$feature_info$feature_id[!data$feature_info$housekeeping]
# this dataset has no null genes.
set.seed(5)
null1 <- t(apply(counts,1,sample))
dimnames(null1) <- list(paste0("H",1:501),paste0("C",1:2011))
null2 <- t(apply(counts,1,sample))
dimnames(null2) <- list(paste0("H",502:1002),paste0("C",1:2011))
null3 <- t(apply(counts,1,sample))
dimnames(null3) <- list(paste0("H",1003:1503),paste0("C",1:2011))
counts <- rbind(counts,null1,null2,null3)
nullGenes <- rownames(counts)[substr(rownames(counts),1,1)=="H"]
pal <- wes_palette("Zissou1", 12, type = "continuous")
truePseudotime <- data$prior_information$timecourse_continuous[colnames(counts)]
g <- Hmisc::cut2(truePseudotime,g=12)
# quantile normalization
FQnorm <- function(counts){
rk <- apply(counts,2,rank,ties.method='min')
counts.sort <- apply(counts,2,sort)
refdist <- apply(counts.sort,1,median)
norm <- apply(rk,2,function(r){ refdist[r] })
rownames(norm) <- rownames(counts)
return(norm)
}
normCounts <- FQnorm(counts)
## dim red
pca <- prcomp(log1p(t(normCounts)), scale. = FALSE)
rd <- pca$x[,1:3]
plot(rd, pch=16, asp = 1)

set.seed(9)
cl <- kmeans(rd, centers = 8)$cluster
plot(rd, col = brewer.pal(9,"Set1")[cl], pch=16, asp = 1)
legend("topleft",legend=as.character(1:7),col=brewer.pal(9,"Set1")[1:7],pch=16,cex=2/3,bty='n')

plot(rd, col = pal[g], pch=16, asp = 1)
#lineages
lin <- getLineages(rd, cl, start.clus=6, end.clus=c(5,2))
## Using full covariance matrix
plot(rd, col = pal[g], pch=16, asp = 1)
lines(lin,lwd=2)

#curves
crv <- getCurves(lin)
plot(rd, col = pal[g], pch=16, asp = 1)
lines(crv, lwd=2, col="black")

# milestone ID
gid <- data$prior_information$groups_id
gid <- gid[match(colnames(counts),gid$cell_id),]
plot(rd, col = as.numeric(as.factor(gid$group_id))+1, pch=16, asp = 1)

fit smoothers on raw data
cWeights <- slingCurveWeights(crv)
pseudoT <- slingPseudotime(crv, na=FALSE)
gamList <- fitGAM(counts, pseudotime=pseudoT, cellWeights=cWeights, verbose=FALSE)
Test for differences at end point
endRes <- diffEndTest(gamList)
hist(endRes$pvalue)

deGenesEndGam <- rownames(counts)[which(p.adjust(endRes$pvalue,"fdr")<=0.05)]
mean(falseGenes%in%deGenesEndGam) #TPR
## [1] 0.6626747
mean(!deGenesEndGam%in%falseGenes) #FDR
## [1] 0
edgeR analysis on final clusters
clF <- as.factor(cl)
design <- model.matrix(~clF)
library(edgeR)
d <- DGEList(counts)
d <- calcNormFactors(d)
d <- estimateDisp(d, design)
fit <- glmFit(d, design)
lrt <- glmLRT(fit, coef="clF2") #cluster 2 vs cluster 1 (end clusters)
deGenesEdgeR <- rownames(lrt$table)[p.adjust(lrt$table$PValue,"fdr")<=0.05]
mean(falseGenes%in%deGenesEdgeR) #TPR
## [1] 0.7964072
mean(deGenesEdgeR%in%nullGenes) #FDR
## [1] 0.007462687
compare GAM end point vs edgeR analysis
library(rafalib)
mypar(mfrow=c(1,1))
## compare
edgeR <- p.adjust(lrt$table$PValue,"fdr")<=0.05
tradeR <- p.adjust(endRes$pval,"fdr")<=0.05
vennC <- cbind(edgeR,tradeR)
vennDiagram(vennC)

Test for different expression pattern
resPattern <- patternTest(gamList)
hist(resPattern$pval)

deGenesPattern <- rownames(counts)[which(p.adjust(resPattern$pval,"fdr")<=0.05)]
length(deGenesPattern)
## [1] 277
mean(falseGenes%in%deGenesPattern) #TPR
## [1] 0.5528942
mean(!deGenesPattern%in%falseGenes) #FDR
## [1] 0
Monocle BEAM analysis
### old monocle BEAM analysis
library(monocle,lib.loc="~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/methodsPaper/")
## Loading required package: Matrix
## Loading required package: ggplot2
## Loading required package: VGAM
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:mgcv':
##
## s
## Loading required package: DDRTree
## Loading required package: irlba
featureInfo <- data.frame(gene_short_name=rownames(counts))
rownames(featureInfo) <- rownames(counts)
fd <- new("AnnotatedDataFrame", featureInfo)
cds <- newCellDataSet(cellData=counts, featureData=fd, expressionFamily=negbinomial.size())
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds)
## Removing 154 outliers
cds <- reduceDimension(cds)#, max_components = 4, method = 'ICA')
cds <- orderCells(cds)#, num_paths=2)
plot_cell_trajectory(cds, color_by = "State")

# monocle does not find a branching point on 2 components! specify 4.
BEAM_res <- BEAM(cds, cores = 1)
## Warning in if (progenitor_method == "duplicate") {: the condition has
## length > 1 and only the first element will be used
## Warning in if (progenitor_method == "sequential_split") {: the condition
## has length > 1 and only the first element will be used
sum(BEAM_res$qval<0.05)
## [1] 404
tradeR downstream of Monocle 2
phenoData(cds)$group_id <- gid$group_id
plot_cell_trajectory(cds, color_by = "group_id")

plot(rd, col = as.numeric(as.factor(gid$group_id))+1, pch=16, asp = 1) ; legend("topleft", paste0("M",1:4), col=2:5,pch=16)

# Milestone 2 and 4 are the two branches to be compared.
ptMon <- matrix(phenoData(cds)$Pseudotime, nrow=ncol(counts), ncol=2, byrow=FALSE)
state <- phenoData(cds)$State
cellWeightsMon <- matrix(0, nrow=ncol(counts), ncol=2)
cellWeightsMon[state==1,] <- c(1/2,1/2)
cellWeightsMon[state==3,1] <- 1
cellWeightsMon[state==2,2] <- 1
gamListMon <- fitGAM(counts, pseudotime=ptMon, cellWeights=cellWeightsMon, verbose=TRUE)
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|====== | 10%
|
|======= | 10%
|
|======= | 11%
|
|======= | 12%
|
|======== | 12%
|
|======== | 13%
|
|========= | 13%
|
|========= | 14%
|
|========= | 15%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 16%
|
|=========== | 17%
|
|=========== | 18%
|
|============ | 18%
|
|============ | 19%
|
|============= | 19%
|
|============= | 20%
|
|============= | 21%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 22%
|
|=============== | 23%
|
|=============== | 24%
|
|================ | 24%
|
|================ | 25%
|
|================= | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 28%
|
|=================== | 29%
|
|=================== | 30%
|
|==================== | 30%
|
|==================== | 31%
|
|==================== | 32%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|====================== | 35%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 36%
|
|======================== | 37%
|
|======================== | 38%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 39%
|
|========================== | 40%
|
|========================== | 41%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 42%
|
|============================ | 43%
|
|============================ | 44%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================ | 50%
|
|================================= | 50%
|
|================================= | 51%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|=================================== | 55%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 56%
|
|===================================== | 57%
|
|===================================== | 58%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 59%
|
|======================================= | 60%
|
|======================================= | 61%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================ | 67%
|
|============================================ | 68%
|
|============================================= | 68%
|
|============================================= | 69%
|
|============================================= | 70%
|
|============================================== | 70%
|
|============================================== | 71%
|
|============================================== | 72%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================ | 75%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 76%
|
|================================================== | 77%
|
|================================================== | 78%
|
|=================================================== | 78%
|
|=================================================== | 79%
|
|==================================================== | 79%
|
|==================================================== | 80%
|
|==================================================== | 81%
|
|===================================================== | 81%
|
|===================================================== | 82%
|
|====================================================== | 82%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 85%
|
|======================================================== | 86%
|
|======================================================== | 87%
|
|========================================================= | 87%
|
|========================================================= | 88%
|
|========================================================== | 88%
|
|========================================================== | 89%
|
|========================================================== | 90%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|=========================================================== | 92%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 93%
|
|============================================================= | 94%
|
|============================================================= | 95%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 96%
|
|=============================================================== | 97%
|
|=============================================================== | 98%
|
|================================================================ | 98%
|
|================================================================ | 99%
|
|=================================================================| 99%
|
|=================================================================| 100%
resPatternMon <- patternTest(gamListMon)
resEndMon <- diffEndTest(gamListMon)
tradeR on true pseudotime
### tradeR on true pseudotime
pst <- matrix(truePseudotime, nrow=ncol(counts),ncol=2, byrow=FALSE)
gamListTrueTime <- fitGAM(counts, pseudotime=pst, cellWeights=slingCurveWeights(crv), verbose=FALSE)
# end point Test
waldEndPointResTrueTime <- diffEndTest(gamListTrueTime)
padjWaldTrueTime <- p.adjust(waldEndPointResTrueTime$pvalue,"fdr")
sum(padjWaldTrueTime<=0.05)
## [1] 297
# pattern test
patternResTrueTime <- patternTest(gamListTrueTime)
padjPatternTrueTime <- p.adjust(patternResTrueTime$pvalue,"fdr")
sum(padjPatternTrueTime<=0.05, na.rm=TRUE)
## [1] 219
GPfates
# export
logCpm <- edgeR::cpm(counts, prior.count=.125, log=TRUE)
sampleInfo <- data.frame(global_pseudotime=truePseudotime)
rownames(sampleInfo) <- colnames(counts)
write.table(logCpm, file="~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/tradeRPaper/simulation/sim2_dyntoy_bifurcating_4/simDyntoyLogCpm.txt", row.names=TRUE, col.names=TRUE, quote=FALSE)
write.table(sampleInfo, file="~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/tradeRPaper/simulation/sim2_dyntoy_bifurcating_4/sampleInfoSimDyntoy.txt", row.names=TRUE, col.names=TRUE, quote=FALSE)
# run GPfates python script
GPfatesWeights <- read.table("~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/tradeRPaper/simulation/sim2_dyntoy_bifurcating_4/GPfatesWeights.txt", header=FALSE)
GPfatesBif <- read.table("~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/tradeRPaper/simulation/sim2_dyntoy_bifurcating_4/GPfatesBifStats.txt", header=FALSE)
colnames(GPfatesBif) <- c("bif_ll", "amb_ll", "shuff_bif_ll", "shuff_amb_ll", "phi0_corr", "D", "shuff_D")
GPfates weights + tradeR
# based on true pseudotime
gamListGPfatesTrueTime <- fitGAM(counts, pseudotime=pst, cellWeights=GPfatesWeights, verbose=FALSE)
# end point test
waldEndPointResTrueTimeGPfates <- diffEndTest(gamListGPfatesTrueTime)
# pattern test
patternResTrueTimeGPfates <- patternTest(gamListGPfatesTrueTime)
plot the false genes
pdf("~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/tradeRPaper/simulation/sim2_dyntoy_bifurcating_4/falseGenesTruePseudotime.pdf")
for(i in 1:length(falseGenes)){
plotSmoothers(gamListTrueTime[[falseGenes[i]]])
}
dev.off()
## quartz_off_screen
## 2
plot the null genes (which are all permuted)
pdf("~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/tradeRPaper/simulation/sim2_dyntoy_bifurcating_4/nullGenesTruePseudotime.pdf")
for(i in 1:length(nullGenes)){
plotSmoothers(gamListTrueTime[[nullGenes[i]]])
}
dev.off()
## quartz_off_screen
## 2
plot example genes
mypar(mfrow=c(1,2))
i=1
plotSmoothers(gamListTrueTime[[falseGenes[i]]])
plotSmoothers(gamListTrueTime[[nullGenes[i]]])

Performance plots
########
# FDP-TPR
########
library(iCOBRA)
##
## Attaching package: 'iCOBRA'
## The following objects are masked from 'package:BiocGenerics':
##
## score, score<-
library(scales)
truth <- as.data.frame(matrix(rep(0,nrow(counts)), dimnames=list(rownames(counts),"status")))
truth[falseGenes,"status"] <- 1
cols <- hue_pal()(10)
names(cols) <- c("BEAM", "GPfates", "edgeR", "tradeR_slingshot_end", "tradeR_slingshot_pattern", "tradeR_GPfates_end", "tradeR_GPfates_pattern", "tradeR_Monocle2_end", "tradeR_Monocle2_pattern")
### estimated pseudotime
pval <- data.frame( tradeR_slingshot_end=endRes$pval,
tradeR_slingshot_pattern=resPattern$pval,
BEAM=BEAM_res$pval,
edgeR=lrt$table$PValue,
tradeR_Monocle2_end=resEndMon$pvalue,
tradeR_Monocle2_pattern=resPatternMon$pvalue,
row.names=rownames(counts))
cobra <- COBRAData(pval=pval, truth=truth)
cobra <- calculate_adjp(cobra)
## Object up to date
cobra <- calculate_performance(cobra, binary_truth="status")
cobraplot <- prepare_data_for_plot(cobra, colorscheme=cols[match(sort(unique(cobra@roc$method)),names(cols))])
## Warning in define_colors(cobraperf = cobraperf, palette = colorscheme,
## facetted = facetted, : too few colors provided, 1 random colors will be
## added
plot_roc(cobraplot)

plot_fdrtprcurve(cobraplot, pointsize=0, yaxisrange=c(0.6,1))
## Warning: Removed 1231 rows containing missing values (geom_path).
## Warning: Removed 7 rows containing missing values (geom_point).

pEst <- plot_fdrtprcurve(cobraplot, pointsize=0, yaxisrange=c(0.6,1), xaxisrange=c(0,0.5))
### true pseudotime
pval <- data.frame( tradeR_slingshot_end=waldEndPointResTrueTime$pval,
tradeR_slingshot_pattern=patternResTrueTime$pval,
BEAM=BEAM_res$pval,
edgeR=lrt$table$PValue,
tradeR_GPfates_end=waldEndPointResTrueTimeGPfates$pval,
tradeR_GPfates_pattern=patternResTrueTimeGPfates$pval,
row.names=rownames(counts))
score <- data.frame(GPfates=GPfatesBif$D,
row.names=rownames(counts))
cobra <- COBRAData(pval=pval, truth=truth, score=score)
cobra <- calculate_adjp(cobra)
## Object up to date
cobra <- calculate_performance(cobra, binary_truth="status")
## column GPfates is being ignored for NBRS calculations
## column GPfates is being ignored for TPR calculations
## column GPfates is being ignored for FDR calculations
## column GPfates is being ignored for FPR calculations
cobraplot <- prepare_data_for_plot(cobra, colorscheme=cols[match(sort(unique(cobra@roc$method)),names(cols))])
## Warning in define_colors(cobraperf = cobraperf, palette = colorscheme,
## facetted = facetted, : too few colors provided, 1 random colors will be
## added
plot_roc(cobraplot)

plot_fdrtprcurve(cobraplot, pointsize=0, yaxisrange=c(0.6,1), xaxisrange=c(0,0.5))
## Warning: Removed 9811 rows containing missing values (geom_path).
## Warning: Removed 10 rows containing missing values (geom_point).

pTrue <- plot_fdrtprcurve(cobraplot, pointsize=0, yaxisrange=c(0.6,1), xaxisrange=c(0,0.5))
#
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.5.2
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
prow <- plot_grid( pEst + theme(legend.position="none") + xlab("FDP") + ggtitle("Based on estimated pseudotime"),
pTrue + theme(legend.position="none") + xlab("FDP") + ggtitle("Based on true pseudotime"),
align = 'vh',
labels = c("a", "b"),
hjust = -1,
nrow = 1
)
## Warning: Removed 7542 rows containing missing values (geom_path).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 9811 rows containing missing values (geom_path).
## Warning: Removed 10 rows containing missing values (geom_point).
legend_a <- get_legend(pTrue + theme(legend.position="bottom"))
## Warning: Removed 9811 rows containing missing values (geom_path).
## Warning: Removed 10 rows containing missing values (geom_point).
p <- plot_grid( prow, legend_a, ncol = 1, rel_heights = c(1, .2))
p

# png("~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/tradeRPaper/simulation/sim2sim2_dyntoy_bifurcating_4/performance_sim2Dyntoy.png", width=7,height=6, units="in", res=300)
# p
# dev.off()
#composite one plot
pval <- data.frame( tradeR_slingshot_end=endRes$pval,
tradeR_slingshot_pattern=resPattern$pval,
BEAM=BEAM_res$pval,
edgeR=lrt$table$PValue,
tradeR_GPfates_end=waldEndPointResTrueTimeGPfates$pval,
tradeR_GPfates_pattern=patternResTrueTimeGPfates$pval,
tradeR_Monocle2_end=resEndMon$pvalue,
tradeR_Monocle2_pattern=resPatternMon$pvalue,
row.names=rownames(counts))
score <- data.frame(GPfates=GPfatesBif$D,
row.names=rownames(counts))
cobra <- COBRAData(pval=pval, truth=truth, score=score)
saveRDS(cobra,"~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/tradeRPaper/simulation/sim2_dyntoy_bifurcating_4/cobra.rds")
cobra <- calculate_adjp(cobra)
## Object up to date
cobra <- calculate_performance(cobra, binary_truth="status")
## column GPfates is being ignored for NBRS calculations
## column GPfates is being ignored for TPR calculations
## column GPfates is being ignored for FDR calculations
## column GPfates is being ignored for FPR calculations
cobraplot <- prepare_data_for_plot(cobra, colorscheme=cols[match(sort(unique(cobra@roc$method)),names(cols))])
## Warning in define_colors(cobraperf = cobraperf, palette = colorscheme,
## facetted = facetted, : too few colors provided, 1 random colors will be
## added
plot_roc(cobraplot)

plot_fdrtprcurve(cobraplot, pointsize=0, yaxisrange=c(0.8,1), xaxisrange=c(0,0.5))
## Warning: Removed 13269 rows containing missing values (geom_path).
## Warning: Removed 22 rows containing missing values (geom_point).

# pAll <- plot_fdrtprcurve(cobraplot, pointsize=3/2, yaxisrange=c(0.8,1), xaxisrange=c(0,0.5))
# png("~/Dropbox/PhD/Research/singleCell/trajectoryInference/trajectoryDE/plots/performanceSim1All.png", width=7,height=6, units="in", res=300)
# pAll
# dev.off()